In order to widen Open Context's interoperability with other scientific information systems, we are starting to cross-reference Open Context published biological taxonomy categores with GBIF (Global Biodiversity Information Facility, https://gbif.org) identifiers.

To start this process, this Jupyter notebooks will find GBIF identifiers that correspond with EOL (Encyclopedia of Life, https://eol.org) identifiers already used by Open Context.

The datasets used and created by this notebook are stored in the /files/eol directory. The files used and created by this notebook include:

eol-gbif.csv.gz (This source of the data is: https://opendata.eol.org/dataset/identifier-map, dated 2019-12-20. The data is filtered to only include records where the resource_id is 767, which corresponds to GBIF.)
oc-eol-uris.csv (This is a CSV dump of from the Open Context, current as of 2020-01-15, link_entities model where URIs started with 'http://eol.org'. It represents all of the EOL entities that Open Context uses to cross-reference project-specific biological taxonomic concepts.)
oc-eol-gbif-with-missing.csv (This is the scratch, working data file that has oc-eol-uri.csv data, with joined records from eol-gbif.csv. Execution of this notebook creates this file and periodically updates this file with names and new IDs resulting from requests to the GBIF API.)
oc-eol-gbif.csv (This notebook generates this file which describes equivalences between the EOL items used by Open Context and corresponding GBIF identifiers.)
oc-eol-no-gbif.csv (This notebook generates this file which describes EOL items used by Open Context that lack corresponding GBIF identifiers. These records will probably need manual curation.)



In [1]:

    
import json
import os
import requests
from time import sleep

import numpy as np
import pandas as pd

# Get the root_path for this jupyter notebook repo.
repo_path = os.path.dirname(os.path.abspath(os.getcwd()))

# Path for the (gzip compressed) CSV data dump from EOL 
# with GBIF names and EOL IDs.
eol_gbif_names_path = os.path.join(
    repo_path, 'files', 'eol', 'eol-gbif.csv.gz'
)

# Path for the CSV data from Open Context of all EOL
# URIs and IDs currently referenced by Open Context.
oc_eol_path = os.path.join(
    repo_path, 'files', 'eol', 'oc-eol-uris.csv'
)

# Path for the CSV data that has EOL URIs used by Open Context
# with GBIF URIs and missing GBIF URIs
oc_eol_gbif_w_missing_path = os.path.join(
    repo_path, 'files', 'eol', 'oc-eol-gbif-with-missing.csv'
)

# Path for CSV data that has EOL URIs used by Open Context and
# corresponding GBIF URIs and Names.
oc_eol_gbif_path = os.path.join(
    repo_path, 'files', 'eol', 'oc-eol-gbif.csv'
)

# Path for CSV data that has EOL URIs used by Open Context
# but no corresponding GBIF URIs.
oc_eol_no_gbif_path = os.path.join(
    repo_path, 'files', 'eol', 'oc-eol-no-gbif.csv'
)

Now define some fuctions that we'll be using over and over.



In [2]:

    
def save_result_files(
    df,
    path_with_gbif=oc_eol_gbif_path, 
    path_without_gbif=oc_eol_no_gbif_path
):
    """Saves files for outputs with and without GBIF ids"""
    # Save the interim results with matches
    gbif_index = ~df['gbif_id'].isnull()
    df_ok_gbif = df[gbif_index].copy().reset_index(drop=True)
    print('Saving EOL matches with GBIF...')
    df_ok_gbif.to_csv(path_with_gbif, index=False)
    
    no_gbif_index = df['gbif_id'].isnull()
    df_ok_gbif = df[no_gbif_index].copy().reset_index(drop=True)
    print('Saving EOL records without GBIF matches...')
    df_ok_gbif.to_csv(path_without_gbif, index=False)
    

def get_gbif_cannonical_name(gbif_id, sleep_secs=0.25):
    """Get the cannonical name from the GBIF API for an ID"""
    sleep(sleep_secs)
    url = 'https://api.gbif.org/v1/species/{}'.format(gbif_id)
    print('Get URL: {}'.format(url))
    r = requests.get(url)
    r.raise_for_status()
    json_r = r.json()
    return json_r.get('canonicalName')


def get_gbif_vernacular_name(gbif_id, lang_code='eng', sleep_secs=0.25):
    """Get the first vernacular name from the GBIF API for an ID"""
    sleep(sleep_secs)
    url = 'http://api.gbif.org/v1/species/{}/vernacularNames'.format(
        gbif_id
    )
    print('Get URL: {}'.format(url))
    r = requests.get(url)
    r.raise_for_status()
    json_r = r.json()
    vern_name = None
    for result in json_r.get('results', []):
          if result.get('language') != lang_code:
              continue
          vern_name = result.get("vernacularName")
          if vern_name is not None:
              break
    return vern_name


def add_names_to_gbif_ids(
    df, 
    limit_by_method=None, 
    save_path=oc_eol_gbif_w_missing_path
):
    """Adds names to GBIF ids where those names are missing"""
    gbif_index = ~df['gbif_id'].isnull()
    df.loc[gbif_index, 'gbif_uri'] = df[gbif_index]['gbif_id'].apply(
        lambda x: 'https://www.gbif.org/species/{}'.format(int(x))
    )
    df.to_csv(save_path, index=False)

    # Now use the GBIF API to fetch cannonical names for GBIF items
    # where we do not yet have those names.
    need_can_name_index = (df['gbif_can_name'].isnull() & gbif_index)
    if limit_by_method:
        need_can_name_index &= (df['gbif_rel_method'] == limit_by_method)
    df.loc[need_can_name_index, 'gbif_can_name'] = df[need_can_name_index]['gbif_id'].apply(
        lambda x: get_gbif_cannonical_name(int(x))
    )
    df.to_csv(save_path, index=False)
    
    # Now use the GBIF API to fetch vernacular names for GBIF items
    # where we do not yet have those names.
    need_vern_name_index = (df['gbif_vern_name'].isnull() & gbif_index)
    if limit_by_method:
        need_vern_name_index &= (df['gbif_rel_method'] == limit_by_method)
    df.loc[need_vern_name_index, 'gbif_vern_name'] = df[need_vern_name_index]['gbif_id'].apply(
        lambda x: get_gbif_vernacular_name(int(x))
    )
    df.to_csv(save_path, index=False)
    
    return df
    

def get_gbif_id_by_name(name, sleep_secs=0.25, allow_alts=False):
    """Get a GBIF ID by seatching a name via the GBIF API"""
    sleep(sleep_secs)
    if ' ' in name:
        # Only use the first 2 parts of a name with a space
        name_sp = name.split(' ')
        # This also turns the space into a '+', good for URL enconding.
        if len(name_sp[0]) <= 2 or len(name_sp[1]) <= 2:
            return np.nan
        name = name_sp[0] + '+' + name_sp[1]
    
    url = 'https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c'
    url += '&name={}'.format(name)
    print('Get URL: {}'.format(url))
    r = requests.get(url)
    r.raise_for_status()
    json_r = r.json()
    id = json_r.get('usageKey')
    if id is not None:
        return int(id)
    elif not allow_alts:
        # We don't have an ID, but we're not yet allowing alternatives
        return np.nan
    # Below is for multiple equal matches
    if not allow_alts or json_r.get('matchType') != 'NONE':
        # We don't have an exact match
        return np.nan
    alts = json_r.get('alternatives', [])
    if len(alts) == 0:
        # We don't have alternatives
        return np.nan
    # Chose the first alternative.
    id = alts[0].get('usageKey')
    if not id:
        return np.nan
    return int(id)



In [3]:

    
if not os.path.isfile(oc_eol_gbif_w_missing_path):
    # We don't have the oc_eol_gbif_with missing data
    # so we need to make it.
    df_eol_gbif_names = pd.read_csv(eol_gbif_names_path)
    df_oc_eol = pd.read_csv(oc_eol_path, encoding='utf-8')
    df_oc_eol.rename(columns={'id': 'page_id'}, inplace=True)
    df = df_oc_eol.merge(df_eol_gbif_names, on=['page_id'], how='left')
    print('We have {} rows of EOL uris in OC to relate to GBIF'.format(
            len(df.index)
        )
    )
    df.sort_values(by=['page_id'], inplace=True)
    # Now pull out the GBIF integer ID
    df['gbif_id'] = pd.to_numeric(
        df['resource_pk'], 
        errors='coerce', 
        downcast='integer'
    )
    df['gbif_rel_method'] = np.nan
    df['gbif_uri'] = np.nan
    df['gbif_can_name'] = np.nan
    df['gbif_vern_name'] = np.nan
    
    # Now note that the rows where the gbif_id is not null
    # come from the EOL-GBIF names dataset
    gbif_index = ~df['gbif_id'].isnull()
    df.loc[gbif_index, 'gbif_rel_method'] = 'EOL-GBIF-names'
    df.to_csv(oc_eol_gbif_w_missing_path, index=False)



In [4]:

    
# Get our working dataframe, now that we know that it
# must have been initially created.
df = pd.read_csv(oc_eol_gbif_w_missing_path)

Now that we have a main working dataset, we need to add cannonical and vernacular names to the GBIF IDs.



In [5]:

    
# Use GBIF API calls to add names to records with GBIF IDs but currently
# missing names.
df = add_names_to_gbif_ids(df, save_path=oc_eol_gbif_w_missing_path)









    



Get URL: http://api.gbif.org/v1/species/5857036/vernacularNames
Get URL: http://api.gbif.org/v1/species/229/vernacularNames
Get URL: http://api.gbif.org/v1/species/9582798/vernacularNames
Get URL: http://api.gbif.org/v1/species/3239570/vernacularNames
Get URL: http://api.gbif.org/v1/species/829/vernacularNames
Get URL: http://api.gbif.org/v1/species/9574493/vernacularNames
Get URL: http://api.gbif.org/v1/species/3247005/vernacularNames
Get URL: http://api.gbif.org/v1/species/9301143/vernacularNames
Get URL: http://api.gbif.org/v1/species/9310756/vernacularNames
Get URL: http://api.gbif.org/v1/species/3453/vernacularNames
Get URL: http://api.gbif.org/v1/species/3243262/vernacularNames
Get URL: http://api.gbif.org/v1/species/196/vernacularNames
Get URL: http://api.gbif.org/v1/species/1369/vernacularNames
Get URL: http://api.gbif.org/v1/species/422/vernacularNames
Get URL: http://api.gbif.org/v1/species/946/vernacularNames
Get URL: http://api.gbif.org/v1/species/1370/vernacularNames
Get URL: http://api.gbif.org/v1/species/9425715/vernacularNames
Get URL: http://api.gbif.org/v1/species/4923856/vernacularNames
Get URL: http://api.gbif.org/v1/species/4930387/vernacularNames
Get URL: http://api.gbif.org/v1/species/4930388/vernacularNames
Get URL: http://api.gbif.org/v1/species/933/vernacularNames
Get URL: http://api.gbif.org/v1/species/399/vernacularNames
Get URL: http://api.gbif.org/v1/species/691/vernacularNames
Get URL: http://api.gbif.org/v1/species/773/vernacularNames
Get URL: http://api.gbif.org/v1/species/735/vernacularNames
Get URL: http://api.gbif.org/v1/species/5463/vernacularNames
Get URL: http://api.gbif.org/v1/species/499/vernacularNames
Get URL: http://api.gbif.org/v1/species/803/vernacularNames
Get URL: http://api.gbif.org/v1/species/764/vernacularNames
Get URL: http://api.gbif.org/v1/species/3237699/vernacularNames
Get URL: http://api.gbif.org/v1/species/2477732/vernacularNames
Get URL: http://api.gbif.org/v1/species/2497318/vernacularNames
Get URL: http://api.gbif.org/v1/species/2436127/vernacularNames
Get URL: http://api.gbif.org/v1/species/8216553/vernacularNames
Get URL: http://api.gbif.org/v1/species/3047749/vernacularNames
Get URL: http://api.gbif.org/v1/species/7580221/vernacularNames
Get URL: http://api.gbif.org/v1/species/6007593/vernacularNames
Get URL: http://api.gbif.org/v1/species/2401179/vernacularNames
Get URL: http://api.gbif.org/v1/species/2478232/vernacularNames
Get URL: http://api.gbif.org/v1/species/2482191/vernacularNames
Get URL: http://api.gbif.org/v1/species/2475462/vernacularNames
Get URL: http://api.gbif.org/v1/species/7485221/vernacularNames
Get URL: http://api.gbif.org/v1/species/2481908/vernacularNames
Get URL: http://api.gbif.org/v1/species/7619612/vernacularNames
Get URL: http://api.gbif.org/v1/species/2334959/vernacularNames
Get URL: http://api.gbif.org/v1/species/2409563/vernacularNames
Get URL: http://api.gbif.org/v1/species/2374952/vernacularNames
Get URL: http://api.gbif.org/v1/species/2357084/vernacularNames
Get URL: http://api.gbif.org/v1/species/2383614/vernacularNames
Get URL: http://api.gbif.org/v1/species/2388983/vernacularNames
Get URL: http://api.gbif.org/v1/species/2382944/vernacularNames
Get URL: http://api.gbif.org/v1/species/5213952/vernacularNames
Get URL: http://api.gbif.org/v1/species/8852486/vernacularNames
Get URL: http://api.gbif.org/v1/species/2338059/vernacularNames
Get URL: http://api.gbif.org/v1/species/2481203/vernacularNames
Get URL: http://api.gbif.org/v1/species/2338759/vernacularNames
Get URL: http://api.gbif.org/v1/species/2350830/vernacularNames
Get URL: http://api.gbif.org/v1/species/2345591/vernacularNames
Get URL: http://api.gbif.org/v1/species/2441252/vernacularNames
Get URL: http://api.gbif.org/v1/species/2375065/vernacularNames
Get URL: http://api.gbif.org/v1/species/2391728/vernacularNames
Get URL: http://api.gbif.org/v1/species/2389281/vernacularNames
Get URL: http://api.gbif.org/v1/species/2334672/vernacularNames
Get URL: http://api.gbif.org/v1/species/8905536/vernacularNames
Get URL: http://api.gbif.org/v1/species/2389239/vernacularNames
Get URL: http://api.gbif.org/v1/species/2334461/vernacularNames
Get URL: http://api.gbif.org/v1/species/10092766/vernacularNames
Get URL: http://api.gbif.org/v1/species/2409101/vernacularNames
Get URL: http://api.gbif.org/v1/species/2389830/vernacularNames
Get URL: http://api.gbif.org/v1/species/2409055/vernacularNames
Get URL: http://api.gbif.org/v1/species/2389266/vernacularNames
Get URL: http://api.gbif.org/v1/species/2380997/vernacularNames
Get URL: http://api.gbif.org/v1/species/2394621/vernacularNames
Get URL: http://api.gbif.org/v1/species/2410394/vernacularNames
Get URL: http://api.gbif.org/v1/species/2392446/vernacularNames
Get URL: http://api.gbif.org/v1/species/2409400/vernacularNames
Get URL: http://api.gbif.org/v1/species/7274859/vernacularNames
Get URL: http://api.gbif.org/v1/species/2415245/vernacularNames
Get URL: http://api.gbif.org/v1/species/2488419/vernacularNames
Get URL: http://api.gbif.org/v1/species/1602599/vernacularNames
Get URL: http://api.gbif.org/v1/species/2420725/vernacularNames
Get URL: http://api.gbif.org/v1/species/7439216/vernacularNames
Get URL: http://api.gbif.org/v1/species/3584759/vernacularNames
Get URL: http://api.gbif.org/v1/species/7374096/vernacularNames
Get URL: http://api.gbif.org/v1/species/9245902/vernacularNames
Get URL: http://api.gbif.org/v1/species/2487782/vernacularNames
Get URL: http://api.gbif.org/v1/species/9715249/vernacularNames
Get URL: http://api.gbif.org/v1/species/1686160/vernacularNames
Get URL: http://api.gbif.org/v1/species/2478596/vernacularNames
Get URL: http://api.gbif.org/v1/species/3232757/vernacularNames
Get URL: http://api.gbif.org/v1/species/2488889/vernacularNames
Get URL: http://api.gbif.org/v1/species/2490799/vernacularNames
Get URL: http://api.gbif.org/v1/species/2437123/vernacularNames
Get URL: http://api.gbif.org/v1/species/2479900/vernacularNames
Get URL: http://api.gbif.org/v1/species/2457193/vernacularNames
Get URL: http://api.gbif.org/v1/species/2435009/vernacularNames
Get URL: http://api.gbif.org/v1/species/7267754/vernacularNames
Get URL: http://api.gbif.org/v1/species/7292555/vernacularNames
Get URL: http://api.gbif.org/v1/species/7291383/vernacularNames
Get URL: http://api.gbif.org/v1/species/7291606/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480791/vernacularNames
Get URL: http://api.gbif.org/v1/species/8620880/vernacularNames
Get URL: http://api.gbif.org/v1/species/1005637/vernacularNames
Get URL: http://api.gbif.org/v1/species/2287809/vernacularNames
Get URL: http://api.gbif.org/v1/species/2286580/vernacularNames
Get URL: http://api.gbif.org/v1/species/2295683/vernacularNames
Get URL: http://api.gbif.org/v1/species/2295806/vernacularNames
Get URL: http://api.gbif.org/v1/species/8422355/vernacularNames
Get URL: http://api.gbif.org/v1/species/2287370/vernacularNames
Get URL: http://api.gbif.org/v1/species/2286547/vernacularNames
Get URL: http://api.gbif.org/v1/species/2286059/vernacularNames
Get URL: http://api.gbif.org/v1/species/9816371/vernacularNames
Get URL: http://api.gbif.org/v1/species/9816371/vernacularNames
Get URL: http://api.gbif.org/v1/species/9816371/vernacularNames
Get URL: http://api.gbif.org/v1/species/9816371/vernacularNames
Get URL: http://api.gbif.org/v1/species/2285679/vernacularNames
Get URL: http://api.gbif.org/v1/species/2286193/vernacularNames
Get URL: http://api.gbif.org/v1/species/2285804/vernacularNames
Get URL: http://api.gbif.org/v1/species/8837196/vernacularNames
Get URL: http://api.gbif.org/v1/species/2926056/vernacularNames
Get URL: http://api.gbif.org/v1/species/2295962/vernacularNames
Get URL: http://api.gbif.org/v1/species/2304275/vernacularNames
Get URL: http://api.gbif.org/v1/species/3189910/vernacularNames
Get URL: http://api.gbif.org/v1/species/3269532/vernacularNames
Get URL: http://api.gbif.org/v1/species/2484110/vernacularNames
Get URL: http://api.gbif.org/v1/species/7824892/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480952/vernacularNames
Get URL: http://api.gbif.org/v1/species/2473731/vernacularNames
Get URL: http://api.gbif.org/v1/species/2433650/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480382/vernacularNames
Get URL: http://api.gbif.org/v1/species/2481378/vernacularNames
Get URL: http://api.gbif.org/v1/species/7237089/vernacularNames
Get URL: http://api.gbif.org/v1/species/6432208/vernacularNames
Get URL: http://api.gbif.org/v1/species/2703542/vernacularNames
Get URL: http://api.gbif.org/v1/species/6432319/vernacularNames
Get URL: http://api.gbif.org/v1/species/6432212/vernacularNames
Get URL: http://api.gbif.org/v1/species/6431415/vernacularNames
Get URL: http://api.gbif.org/v1/species/2705303/vernacularNames
Get URL: http://api.gbif.org/v1/species/6432340/vernacularNames
Get URL: http://api.gbif.org/v1/species/3259316/vernacularNames
Get URL: http://api.gbif.org/v1/species/2706115/vernacularNames
Get URL: http://api.gbif.org/v1/species/2438703/vernacularNames
Get URL: http://api.gbif.org/v1/species/2374282/vernacularNames
Get URL: http://api.gbif.org/v1/species/9147746/vernacularNames
Get URL: http://api.gbif.org/v1/species/8723518/vernacularNames
Get URL: http://api.gbif.org/v1/species/2362635/vernacularNames
Get URL: http://api.gbif.org/v1/species/2407107/vernacularNames
Get URL: http://api.gbif.org/v1/species/2710735/vernacularNames
Get URL: http://api.gbif.org/v1/species/5220136/vernacularNames
Get URL: http://api.gbif.org/v1/species/2440958/vernacularNames
Get URL: http://api.gbif.org/v1/species/8954022/vernacularNames
Get URL: http://api.gbif.org/v1/species/2290849/vernacularNames
Get URL: http://api.gbif.org/v1/species/2286081/vernacularNames
Get URL: http://api.gbif.org/v1/species/3034241/vernacularNames
Get URL: http://api.gbif.org/v1/species/3760811/vernacularNames
Get URL: http://api.gbif.org/v1/species/9166389/vernacularNames
Get URL: http://api.gbif.org/v1/species/10031650/vernacularNames
Get URL: http://api.gbif.org/v1/species/9330695/vernacularNames
Get URL: http://api.gbif.org/v1/species/8741367/vernacularNames
Get URL: http://api.gbif.org/v1/species/2965544/vernacularNames
Get URL: http://api.gbif.org/v1/species/8351946/vernacularNames
Get URL: http://api.gbif.org/v1/species/2497295/vernacularNames
Get URL: http://api.gbif.org/v1/species/5962255/vernacularNames
Get URL: http://api.gbif.org/v1/species/4409131/vernacularNames
Get URL: http://api.gbif.org/v1/species/2498387/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480589/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480449/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480487/vernacularNames
Get URL: http://api.gbif.org/v1/species/2481912/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480726/vernacularNames
Get URL: http://api.gbif.org/v1/species/9813242/vernacularNames
Get URL: http://api.gbif.org/v1/species/2481784/vernacularNames
Get URL: http://api.gbif.org/v1/species/2481744/vernacularNames
Get URL: http://api.gbif.org/v1/species/5739290/vernacularNames
Get URL: http://api.gbif.org/v1/species/2495471/vernacularNames
Get URL: http://api.gbif.org/v1/species/8277073/vernacularNames
Get URL: http://api.gbif.org/v1/species/7901403/vernacularNames
Get URL: http://api.gbif.org/v1/species/7541363/vernacularNames
Get URL: http://api.gbif.org/v1/species/2708744/vernacularNames
Get URL: http://api.gbif.org/v1/species/7573571/vernacularNames
Get URL: http://api.gbif.org/v1/species/8868148/vernacularNames
Get URL: http://api.gbif.org/v1/species/8726157/vernacularNames
Get URL: http://api.gbif.org/v1/species/5219616/vernacularNames
Get URL: http://api.gbif.org/v1/species/6177452/vernacularNames
Get URL: http://api.gbif.org/v1/species/6163848/vernacularNames
Get URL: http://api.gbif.org/v1/species/7494922/vernacularNames
Get URL: http://api.gbif.org/v1/species/4755869/vernacularNames
Get URL: http://api.gbif.org/v1/species/6101235/vernacularNames
Get URL: http://api.gbif.org/v1/species/3554857/vernacularNames
Get URL: http://api.gbif.org/v1/species/5204761/vernacularNames
Get URL: http://api.gbif.org/v1/species/5789707/vernacularNames
Get URL: http://api.gbif.org/v1/species/7680921/vernacularNames
Get URL: http://api.gbif.org/v1/species/6142030/vernacularNames
Get URL: http://api.gbif.org/v1/species/7222050/vernacularNames
Get URL: http://api.gbif.org/v1/species/4197028/vernacularNames
Get URL: http://api.gbif.org/v1/species/3233023/vernacularNames
Get URL: http://api.gbif.org/v1/species/7308760/vernacularNames
Get URL: http://api.gbif.org/v1/species/4142071/vernacularNames
Get URL: http://api.gbif.org/v1/species/7848264/vernacularNames
Get URL: http://api.gbif.org/v1/species/4366019/vernacularNames
Get URL: http://api.gbif.org/v1/species/3244790/vernacularNames
Get URL: http://api.gbif.org/v1/species/7294506/vernacularNames
Get URL: http://api.gbif.org/v1/species/7294506/vernacularNames
Get URL: http://api.gbif.org/v1/species/8296977/vernacularNames
Get URL: http://api.gbif.org/v1/species/4849628/vernacularNames
Get URL: http://api.gbif.org/v1/species/4262380/vernacularNames
Get URL: http://api.gbif.org/v1/species/8340016/vernacularNames
Get URL: http://api.gbif.org/v1/species/8805026/vernacularNames
Get URL: http://api.gbif.org/v1/species/2397815/vernacularNames
Get URL: http://api.gbif.org/v1/species/3229875/vernacularNames
Get URL: http://api.gbif.org/v1/species/4364928/vernacularNames
Get URL: http://api.gbif.org/v1/species/4609123/vernacularNames
Get URL: http://api.gbif.org/v1/species/9690361/vernacularNames
Get URL: http://api.gbif.org/v1/species/3096932/vernacularNames
Get URL: http://api.gbif.org/v1/species/4067965/vernacularNames
Get URL: http://api.gbif.org/v1/species/7226464/vernacularNames
Get URL: http://api.gbif.org/v1/species/7671182/vernacularNames
Get URL: http://api.gbif.org/v1/species/2599038/vernacularNames
Get URL: http://api.gbif.org/v1/species/3188456/vernacularNames
Get URL: http://api.gbif.org/v1/species/7404270/vernacularNames
Get URL: http://api.gbif.org/v1/species/9712517/vernacularNames
Get URL: http://api.gbif.org/v1/species/4304691/vernacularNames

Now that we have added GBIF names to rows that have GBIF IDs, we will save our interim results.



In [6]:

    
# Save the Open Context EOL URIs with clear GBIF matches,
# as well as a file without matches
save_result_files(df)









    



Saving EOL matches with GBIF...
Saving EOL records without GBIF matches...

At this point, we will still be missing GBIF IDs for many rows of EOL records. So now, we will use the GBIF search API to find related GBIF IDs.



In [7]:

    
# Now try to look up GBIF items where we don't have
# clear matches.
look_ups = [
    # Tuples are:
    # (field_for_name, allow_alts, gbif_rel_method,),
    ('preferred_canonical_for_page', False, 'EOL-pref-page-GBIF-exact-search',),
    ('preferred_canonical_for_page', True, 'EOL-pref-page-GBIF-search-w-alts',),
    ('label', False, 'EOL-OC-label-GBIF-exact-search',),
    ('label', True, 'EOL-OC-label-GBIF-search-w-alts',),    
]

# Now iterate through these look_up configs.
for field_for_name, allow_alts, gbif_rel_method in look_ups:
    gbif_index = ~df['gbif_id'].isnull()
    ok_eol = df[gbif_index]['uri'].unique().tolist()
    no_gbif_index = (df['gbif_id'].isnull() & ~df['uri'].isin(ok_eol))

    # Get the index where there's a preferred_canonical_for_page (EOL) name, but
    # where we have no GBIF id yet.
    no_gbif_index_w_name = (~df[field_for_name].isnull() & no_gbif_index)
    # Use the GBIF API to lookup GBIF IDs.
    df.loc[no_gbif_index_w_name, 'gbif_id'] = df[no_gbif_index_w_name][field_for_name].apply(
        lambda x: get_gbif_id_by_name(x, allow_alts=allow_alts)
    )
    # The new GBIF IDs will have a gbif_rel_method of null. Make sure that we record
    # the gbif_rel_method at this point.
    new_gbif_id_index = (~df['gbif_id'].isnull() & df['gbif_rel_method'].isnull())
    df.loc[new_gbif_id_index, 'gbif_rel_method'] = gbif_rel_method
    
    # Save the interim results
    df.to_csv(oc_eol_gbif_w_missing_path, index=False)
    
    # Now add names to the rows where we just found new IDs.
    df = add_names_to_gbif_ids(
        df, 
        limit_by_method=gbif_rel_method, 
        save_path=oc_eol_gbif_w_missing_path
    )
    
    # Save the interim results, again.
    df.to_csv(oc_eol_gbif_w_missing_path, index=False)
    # Save the interim results with matches to a file
    # and without matches to another file.
    save_result_files(df)









    



Get URL: http://api.gbif.org/v1/species/8852486/vernacularNames
Get URL: http://api.gbif.org/v1/species/8905536/vernacularNames
Get URL: http://api.gbif.org/v1/species/7439216/vernacularNames
Get URL: http://api.gbif.org/v1/species/9816371/vernacularNames
Get URL: http://api.gbif.org/v1/species/9816371/vernacularNames
Get URL: http://api.gbif.org/v1/species/9816371/vernacularNames
Get URL: http://api.gbif.org/v1/species/9816371/vernacularNames
Get URL: http://api.gbif.org/v1/species/7494922/vernacularNames
Get URL: http://api.gbif.org/v1/species/9712517/vernacularNames
Saving EOL matches with GBIF...
Saving EOL records without GBIF matches...
Get URL: http://api.gbif.org/v1/species/1005637/vernacularNames
Get URL: http://api.gbif.org/v1/species/3269532/vernacularNames
Saving EOL matches with GBIF...
Saving EOL records without GBIF matches...
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Not+assigned
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Rubiales
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Potamoidea+Ortmann,
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=acacia
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=balistes
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=brosme
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=bairdiella
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=lontra
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=centaurea
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Veneroidea+Rafinesque,
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=(Unspecified)+Vertebrates
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Chondrichthyes
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Serpentes+Linnaeus,
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Metatheria+Huxley,
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Ungulata
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Ostreoidea
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Unionoidea+Rafinesque,
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Trifolieae
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Pelycopoda
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Sheep/goat
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Serpentes
Get URL: http://api.gbif.org/v1/species/3247005/vernacularNames
Get URL: http://api.gbif.org/v1/species/3453/vernacularNames
Get URL: http://api.gbif.org/v1/species/196/vernacularNames
Get URL: http://api.gbif.org/v1/species/803/vernacularNames
Get URL: http://api.gbif.org/v1/species/764/vernacularNames
Get URL: http://api.gbif.org/v1/species/2477732/vernacularNames
Get URL: http://api.gbif.org/v1/species/2497318/vernacularNames
Get URL: http://api.gbif.org/v1/species/2401179/vernacularNames
Get URL: http://api.gbif.org/v1/species/2478232/vernacularNames
Get URL: http://api.gbif.org/v1/species/2482191/vernacularNames
Get URL: http://api.gbif.org/v1/species/2475462/vernacularNames
Get URL: http://api.gbif.org/v1/species/7485221/vernacularNames
Get URL: http://api.gbif.org/v1/species/2481908/vernacularNames
Get URL: http://api.gbif.org/v1/species/7619612/vernacularNames
Get URL: http://api.gbif.org/v1/species/2334959/vernacularNames
Get URL: http://api.gbif.org/v1/species/2409563/vernacularNames
Get URL: http://api.gbif.org/v1/species/2374952/vernacularNames
Get URL: http://api.gbif.org/v1/species/2357084/vernacularNames
Get URL: http://api.gbif.org/v1/species/2383614/vernacularNames
Get URL: http://api.gbif.org/v1/species/2388983/vernacularNames
Get URL: http://api.gbif.org/v1/species/2382944/vernacularNames
Get URL: http://api.gbif.org/v1/species/5213952/vernacularNames
Get URL: http://api.gbif.org/v1/species/2338059/vernacularNames
Get URL: http://api.gbif.org/v1/species/2481203/vernacularNames
Get URL: http://api.gbif.org/v1/species/2350830/vernacularNames
Get URL: http://api.gbif.org/v1/species/2345591/vernacularNames
Get URL: http://api.gbif.org/v1/species/2375065/vernacularNames
Get URL: http://api.gbif.org/v1/species/2391728/vernacularNames
Get URL: http://api.gbif.org/v1/species/2389281/vernacularNames
Get URL: http://api.gbif.org/v1/species/2334672/vernacularNames
Get URL: http://api.gbif.org/v1/species/2389239/vernacularNames
Get URL: http://api.gbif.org/v1/species/2334461/vernacularNames
Get URL: http://api.gbif.org/v1/species/10092766/vernacularNames
Get URL: http://api.gbif.org/v1/species/2409101/vernacularNames
Get URL: http://api.gbif.org/v1/species/2389830/vernacularNames
Get URL: http://api.gbif.org/v1/species/2409055/vernacularNames
Get URL: http://api.gbif.org/v1/species/2389266/vernacularNames
Get URL: http://api.gbif.org/v1/species/2380997/vernacularNames
Get URL: http://api.gbif.org/v1/species/2394621/vernacularNames
Get URL: http://api.gbif.org/v1/species/2410394/vernacularNames
Get URL: http://api.gbif.org/v1/species/2392446/vernacularNames
Get URL: http://api.gbif.org/v1/species/2409400/vernacularNames
Get URL: http://api.gbif.org/v1/species/2415245/vernacularNames
Get URL: http://api.gbif.org/v1/species/2488419/vernacularNames
Get URL: http://api.gbif.org/v1/species/2420725/vernacularNames
Get URL: http://api.gbif.org/v1/species/2487782/vernacularNames
Get URL: http://api.gbif.org/v1/species/9715249/vernacularNames
Get URL: http://api.gbif.org/v1/species/1686160/vernacularNames
Get URL: http://api.gbif.org/v1/species/2478596/vernacularNames
Get URL: http://api.gbif.org/v1/species/2488889/vernacularNames
Get URL: http://api.gbif.org/v1/species/2490799/vernacularNames
Get URL: http://api.gbif.org/v1/species/2479900/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480791/vernacularNames
Get URL: http://api.gbif.org/v1/species/2287809/vernacularNames
Get URL: http://api.gbif.org/v1/species/2286580/vernacularNames
Get URL: http://api.gbif.org/v1/species/2287370/vernacularNames
Get URL: http://api.gbif.org/v1/species/2286547/vernacularNames
Get URL: http://api.gbif.org/v1/species/2286059/vernacularNames
Get URL: http://api.gbif.org/v1/species/2285679/vernacularNames
Get URL: http://api.gbif.org/v1/species/2286193/vernacularNames
Get URL: http://api.gbif.org/v1/species/2285804/vernacularNames
Get URL: http://api.gbif.org/v1/species/2484110/vernacularNames
Get URL: http://api.gbif.org/v1/species/7824892/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480952/vernacularNames
Get URL: http://api.gbif.org/v1/species/2473731/vernacularNames
Get URL: http://api.gbif.org/v1/species/2433650/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480382/vernacularNames
Get URL: http://api.gbif.org/v1/species/2481378/vernacularNames
Get URL: http://api.gbif.org/v1/species/7237089/vernacularNames
Get URL: http://api.gbif.org/v1/species/2374282/vernacularNames
Get URL: http://api.gbif.org/v1/species/2362635/vernacularNames
Get URL: http://api.gbif.org/v1/species/2407107/vernacularNames
Get URL: http://api.gbif.org/v1/species/2286081/vernacularNames
Get URL: http://api.gbif.org/v1/species/10031650/vernacularNames
Get URL: http://api.gbif.org/v1/species/8741367/vernacularNames
Get URL: http://api.gbif.org/v1/species/8351946/vernacularNames
Get URL: http://api.gbif.org/v1/species/2497295/vernacularNames
Get URL: http://api.gbif.org/v1/species/5962255/vernacularNames
Get URL: http://api.gbif.org/v1/species/2498387/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480589/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480449/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480487/vernacularNames
Get URL: http://api.gbif.org/v1/species/2481912/vernacularNames
Get URL: http://api.gbif.org/v1/species/2480726/vernacularNames
Get URL: http://api.gbif.org/v1/species/9813242/vernacularNames
Get URL: http://api.gbif.org/v1/species/2481784/vernacularNames
Get URL: http://api.gbif.org/v1/species/2481744/vernacularNames
Get URL: http://api.gbif.org/v1/species/5739290/vernacularNames
Get URL: http://api.gbif.org/v1/species/2495471/vernacularNames
Get URL: http://api.gbif.org/v1/species/8277073/vernacularNames
Get URL: http://api.gbif.org/v1/species/7901403/vernacularNames
Get URL: http://api.gbif.org/v1/species/6101235/vernacularNames
Get URL: http://api.gbif.org/v1/species/7848264/vernacularNames
Get URL: http://api.gbif.org/v1/species/7294506/vernacularNames
Get URL: http://api.gbif.org/v1/species/7294506/vernacularNames
Get URL: http://api.gbif.org/v1/species/8296977/vernacularNames
Get URL: http://api.gbif.org/v1/species/4849628/vernacularNames
Get URL: http://api.gbif.org/v1/species/8340016/vernacularNames
Get URL: http://api.gbif.org/v1/species/4609123/vernacularNames
Get URL: http://api.gbif.org/v1/species/9690361/vernacularNames
Get URL: http://api.gbif.org/v1/species/7404270/vernacularNames
Get URL: http://api.gbif.org/v1/species/4304691/vernacularNames
Saving EOL matches with GBIF...
Saving EOL records without GBIF matches...
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Not+assigned
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Rubiales
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Potamoidea+Ortmann,
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=acacia
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=balistes
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=brosme
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=bairdiella
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=lontra
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=centaurea
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Veneroidea+Rafinesque,
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=(Unspecified)+Vertebrates
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Chondrichthyes
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Serpentes+Linnaeus,
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Metatheria+Huxley,
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Ungulata
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Ostreoidea
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Unionoidea+Rafinesque,
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Trifolieae
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Pelycopoda
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Sheep/goat
Get URL: https://api.gbif.org/v1/species/match?verbose=true&dataset_key=d7dddbf4-2cf0-4f39-9b2a-bb099caae36c&name=Serpentes
Get URL: http://api.gbif.org/v1/species/5857036/vernacularNames
Get URL: http://api.gbif.org/v1/species/9574493/vernacularNames
Get URL: http://api.gbif.org/v1/species/9301143/vernacularNames
Get URL: http://api.gbif.org/v1/species/9310756/vernacularNames
Get URL: http://api.gbif.org/v1/species/2706115/vernacularNames
Get URL: http://api.gbif.org/v1/species/9330695/vernacularNames
Get URL: http://api.gbif.org/v1/species/4755869/vernacularNames
Get URL: http://api.gbif.org/v1/species/3554857/vernacularNames
Get URL: http://api.gbif.org/v1/species/5204761/vernacularNames
Get URL: http://api.gbif.org/v1/species/5789707/vernacularNames
Get URL: http://api.gbif.org/v1/species/7680921/vernacularNames
Get URL: http://api.gbif.org/v1/species/6142030/vernacularNames
Get URL: http://api.gbif.org/v1/species/8805026/vernacularNames
Get URL: http://api.gbif.org/v1/species/2397815/vernacularNames
Get URL: http://api.gbif.org/v1/species/3229875/vernacularNames
Get URL: http://api.gbif.org/v1/species/4364928/vernacularNames
Get URL: http://api.gbif.org/v1/species/7671182/vernacularNames
Get URL: http://api.gbif.org/v1/species/2599038/vernacularNames
Get URL: http://api.gbif.org/v1/species/3188456/vernacularNames
Saving EOL matches with GBIF...
Saving EOL records without GBIF matches...